"""
# 对数变换实战 Yelp商家点评预测中输入和输出相关性可视化
"""

import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import seaborn as sns

biz_file = open('../数据集/yelp_academic_dataset_business.json')
biz_df = pd.DataFrame([json.loads(x) for x in biz_file.readlines()])

sns.set_style('whitegrid')
fig, (ax_orig, ax_log) = plt.subplots(2, 1)
ax_orig.scatter(biz_df['review_count'], biz_df['stars'])
ax_orig.tick_params(labelsize=14)
ax_orig.set_xlabel('Review Count', fontsize=14)
ax_orig.set_ylabel('Occurrence', fontsize=14)

biz_df['log_review_count'] = np.log10(biz_df['review_count'] + 1)
ax_log.scatter(biz_df['log_review_count'] , biz_df['stars'])
ax_log.tick_params(labelsize=14)
ax_log.set_xlabel('Log10(Review Count)', fontsize=14)
ax_log.set_ylabel('Occurrence', fontsize=14)

plt.savefig('./可视化/Yelp商家点评预测中输入和输出相关性可视化.png')
plt.show()
